
load("Recalls_merged_2019.RData")
Data$kenteken<-as.character(Data$kenteken)

#Remove duplicates
Data$dupl<-duplicated(Data$kenteken)
kentekendupl<-subset(Data, dupl==TRUE, select=kenteken)
Data<-subset(Data, !c(kenteken %in% kentekendupl$kenteken) )
Data$dupl<-NULL

load("Link_Char.RData")
Link_Char<-subset(Link_Char, kenteken %in% Data$kenteken, select=c(kenteken, merk, handelsbenaming))
Data<-merge(Data, Link_Char, by="kenteken", all.x=TRUE, all.y=FALSE)

#Prepare for counting variables
Data$num<-1
#Is the car recalled AND only from Nov 2017?
Data$rec<-0
Data$rec[is.na(Data$referentiecoderdw)==FALSE & Data$publicatiedatumrdw>="2017-11-01"]<-1


#Is the car recalled and BEFORE Nov 2017?
Data$rec_bef<-0
Data$rec_bef[is.na(Data$referentiecoderdw)==FALSE & Data$publicatiedatumrdw<"2017-11-01"]<-1

#Generate model name
Data$id_model<-paste(Data$merk, Data$handelsbenaming,sep="_")


#Calculate number of total cars and recently recalled cars within each type
Temp<-aggregate(cbind(num, rec, rec_bef) ~ id_model, data=Data, FUN="sum")


#Type ok if:
#1) Total cars > recalled cars
#2) At least one recalled car
#3) Model name present
#4) No cars recalled before Nov 2017


Temp$ok<- Temp$num>Temp$rec & Temp$rec>0 & Temp$rec_bef==0 & Temp$id_model!=""
table(Temp$ok)
#365 models with recalls and within variation and without cars with earlier recalls





#Create unique identifier for model
vartype<-paste(Temp$id_model[Temp$ok==TRUE], sep="_")
Data$typevar<-paste(Data$id_model, sep="_")

#Does the model belong with those with sufficient variation in recalls?
Data$var_type<-Data$typevar %in% vartype

table(Data$var_type)
#About 343031 cars

table(Data$var_type & Data$rec==TRUE)
#of which 68111  (19.85%) are recalled

#Remove unnecessary data
rm(vartype, Temp, kentekendupl)
Data$num<-NULL


#Get only cars belonging to variants with sufficient variation in recalls
Split<-subset(Data, var_type==TRUE )
rm(Data)

#Get monthly sequence
months<-seq(as.Date("2017/11/1"), as.Date("2019/03/01"), "months")


#Create base panel
Panel_model<-expand.grid(Split$kenteken, months)
names(Panel_model)<-c("kenteken", "date")
rm(months)


#Get date first registration, in monthly format (date changed to 1)
Temp<-subset(Split, select=c("kenteken", "id_model", "typegoedkeuringsnummer", "variant", "uitvoering", "datumeersteafgiftenederland", "datumeerstetoelating"))
day(Temp$datumeersteafgiftenederland)<-01
day(Temp$datumeerstetoelating)<-01
#Merge
Panel_model<-merge(Panel_model, Temp, by="kenteken", all.x=TRUE)
#New cars already removed in 1d
rm(Temp)

#Order by plate and date
Panel_model<-Panel_model[order(Panel_model$kenteken, Panel_model$date),]


#Remove dates after car was exported abroad
Temp<-subset(Split, select=c("kenteken", "exported_last")) 
#Now the month is the month the change took place, not the month after
Temp$exported_last<-as.Date(Temp$exported_last, format="%Y-%m-%d") %m-% months(1) 
day(Temp$exported_last)<-01
Panel_model<-merge(Panel_model, Temp, by="kenteken", all.x=TRUE, all.y=FALSE) 
#Keep only if export date is not later than current month, or is not exported at all
Panel_model<-subset(Panel_model, date<=exported_last | is.na(exported_last)==TRUE)



#Get resale dates
Temp<-subset(Split, select=c("kenteken", "datumtenaamstelling_all")) 
Panel_model<-merge(Panel_model, Temp, by="kenteken", all.x=TRUE, all.y=FALSE) 
Panel_model$resale<-0
#Mark whether car is sold in a given month in the panel
i<-str_detect(Panel_model$datumtenaamstelling_all, paste(month(Panel_model$date),year(Panel_model$date), sep="/"))
Panel_model$resale[i]<-1



#Get recall dates and recall fixing dates
Temp<-subset(Split, select=c(kenteken, referentiecoderdw, recall_new, recall_fixed)) 
Panel_model<-merge(Panel_model, Temp, by="kenteken", all.x=TRUE, all.y=FALSE)
day(Panel_model$recall_new)<-01
day(Panel_model$recall_fixed)<-01
#Dummy for date of recall
Panel_model$rec_new<-0
i<-Panel_model$recall_new==Panel_model$date & is.na(Panel_model$recall_new)==FALSE
Panel_model$rec_new[i]<-1
#Dummy for recall occurred (over time)
Panel_model$rec_new_t<-0
i<-Panel_model$recall_new<=Panel_model$date & is.na(Panel_model$recall_new)==FALSE
Panel_model$rec_new_t[i]<-1


#Generate number of months from recall
Panel_model$rec_new_dist<-1+((year(Panel_model$date)*12)+(month(Panel_model$date)))-((year(Panel_model$recall_new)*12)+month(Panel_model$recall_new))
Panel_model$rec_new_dist_both<-Panel_model$rec_new_dist
Panel_model$rec_new_dist[Panel_model$rec_new_dist<0]<-0
Panel_model$rec_new_dist[is.na(Panel_model$rec_new_dist)]<-0
Panel_model$rec_new_dist_both[is.na(Panel_model$rec_new_dist_both)]<-0

#Generate distance by 3 months groups
Panel_model$rec_new_dist_cat<-0
Panel_model$rec_new_dist_cat[Panel_model$rec_new_dist>=1 & Panel_model$rec_new_dist<=3]<-1
Panel_model$rec_new_dist_cat[Panel_model$rec_new_dist>=4 & Panel_model$rec_new_dist<=6]<-2
Panel_model$rec_new_dist_cat[Panel_model$rec_new_dist>=7 & Panel_model$rec_new_dist<=9]<-3
Panel_model$rec_new_dist_cat[Panel_model$rec_new_dist>=10 & Panel_model$rec_new_dist<=12]<-4
Panel_model$rec_new_dist_cat[Panel_model$rec_new_dist>=13]<-5
table(Panel_model$rec_new_dist_cat)

#Dummy for date of fixing
Panel_model$rec_fixed<-0
i<-Panel_model$recall_fixed==Panel_model$date & is.na(Panel_model$recall_fixed)==FALSE
Panel_model$rec_fixed[i]<-1
#Dummy for fixing occurred (over time)
Panel_model$rec_fixed_t<-0
i<-Panel_model$recall_fixed<=Panel_model$date & is.na(Panel_model$recall_fixed)==FALSE
Panel_model$rec_fixed_t[i]<-1


#Generate id for type
#Panel_model$id_type<-as.numeric(as.factor(paste(Panel_model$typegoedkeuringsnummer, sep="_")))


Panel_model<-Panel_model[order(Panel_model$id, Panel_model$kenteken, Panel_model$date),]


Panel_model$kenteken<-as.character(Panel_model$kenteken)

#Drop obs if date<date car gets in NL
i<-Panel_model$datumeersteafgiftenederland>Panel_model$date
table(i, useNA="always")
Panel_model<-subset(Panel_model, i==FALSE)
rm(i)

#Generate variable on age of car
Panel_model$age_car<-floor( ( (month(Panel_model$date)+year(Panel_model$date)*12)-(month(Panel_model$datumeerstetoelating)+year(Panel_model$datumeerstetoelating)*12) )/12 )



#Save in R format
save(Panel_model, file="Panel_model.RData")
